#install.packages("lubridate")
#install.packages("ggplot2")
#install.packages("forecast")
#install.packages("here")
#install.packages("knitr")
#install.packages("kableExtra")
#install.packages("dplyr")
#install.packages("openxlsx")
#install.packages("ggthemes")
#install.packages("tidyr")
#install.packages("GGally")
#install.packages("tseries")
#install.packages("blorr")
#install.packages("car")
#install.packages("corrplot")
#install.packages("dlm")
#install.packages("randomForest")
#install.packages("Kendall")
library(lubridate)
library(ggplot2)
library(forecast) 
library(here)
library(knitr)
library(kableExtra)
library(dplyr)
library(openxlsx)
library(ggthemes)
library(tidyr)
library(Kendall)
library(GGally)
library(trend)
library(tseries)
library(blorr)   
library(lmtest) 
library(car)
library(cowplot)
library(corrplot)
here()
raw_water<-read.csv(here('Data/Processed/groundwater_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)
raw_preci<-read.csv(here('Data/Processed/preci_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)
raw_temp<-read.csv(here('Data/Processed/temp_processed.csv'),stringsAsFactors = TRUE,skip=0,header=TRUE)

raw_water<-raw_water[,2:10]
raw_preci<-raw_preci[,2:3]
raw_temp<-raw_temp[,2:3]
#date format 
raw_water$Date<-ymd(raw_water$Date)
raw_temp$Date<-ymd(raw_temp$Date)
raw_preci$Date<-ymd(raw_preci$Dat)

#daily average 
df_water<-raw_water %>%
  group_by(Date) %>%
  summarize(water=mean(Groundwater_level, na.rm=TRUE)) 

#check timeframe 
print(head(df_water$Date,1)) #1993-02-01
## [1] "1990-01-16"
print(tail(df_water$Date,1)) #2022-01-26
## [1] "2022-01-26"
print(head(raw_temp$Date,1)) #1990-01-01
## [1] "1990-01-01"
print(tail(raw_temp$Date,1)) #2024-03-01
## [1] "2024-03-01"
#monthly average 
df_water_monthly<-df_water %>%
  group_by(month=format(Date, "%Y-%m")) %>%
  summarize(monthly_avg = mean(water, na.rm = TRUE))


#merge precipitation dataframe and temperature dataframe 
raw_temp_preci<-left_join(raw_temp,raw_preci,by="Date")

#missing data
#create time dataframe
start_time<-ymd("1993-02-01")
end_time<-ymd("2022-01-26")
df_time_daily<-data.frame(Date=seq(from=start_time, to=end_time, by="1 day"))
df_time_monthly<-data.frame(Date=seq(from=start_time, to=end_time, by="1 month"))

#merge dataframe
df_water_merge<-left_join(df_time_daily,df_water,by="Date")
df_temp_preci_merge<-left_join(df_time_monthly,raw_temp_preci,by="Date")
head(df_water_merge)
##         Date water
## 1 1993-02-01    NA
## 2 1993-02-02    NA
## 3 1993-02-03    NA
## 4 1993-02-04    NA
## 5 1993-02-05    NA
## 6 1993-02-06    NA
head(df_temp_preci_merge)
##         Date Temperature Precipitation
## 1 1993-02-01        52.4          2.46
## 2 1993-03-01        59.0          1.47
## 3 1993-04-01        66.6          0.00
## 4 1993-05-01        76.7          0.48
## 5 1993-06-01        83.2          0.01
## 6 1993-07-01        86.3          0.57
#filter missing data
df_water_missing<-df_water_merge %>%
  filter(is.na(water))  #continuous data since 2001-03-01
df_temp_preci_merge %>%
  filter(is.na(Temperature)) #no missing value 
## [1] Date          Temperature   Precipitation
## <0 rows> (or 0-length row.names)
#drop the missing value 
df_water<-df_water_merge %>%
  filter(Date>=ymd("2001-03-01") & Date<=ymd("2021-10-22"))
df_temp_preci<-df_temp_preci_merge %>%
  filter(Date>=ymd("2001-03-01")& Date<=ymd("2021-10-22"))

df_temp_preci2<-df_temp_preci_merge %>%
  filter(Date>=ymd("2002-02-01")& Date<=ymd("2021-10-22"))
#water is daily data
#temp and preci are monthly data
#plotting original data

plot(df_water[,2], type = "l")

plot
## function (x, y, ...) 
## UseMethod("plot")
## <bytecode: 0x7ff63346b0e8>
## <environment: namespace:base>
#monthly average 
df_water_monthly<-df_water %>%
  group_by(month=format(Date, "%Y-%m")) %>%
  summarize(monthly_avg = mean(water, na.rm = TRUE))

df_water_monthly$month <- ym(df_water_monthly$month)

df_water_monthly <- df_water_monthly %>%
  filter(month>=as.Date("2002-02-01"))

#water
#msts_water<-msts(df_water[,2],seasonal.periods=c(7,365),start =c(2001,03), end=c(2021,10))
ts_water_monthly<-ts(df_water_monthly[,2], frequency=12,start =c(2002,02), end=c(2021,10))
#autoplot(msts_water)
autoplot(ts_water_monthly)

#question
#ts_water_monthly<-ts(df_water_monthly[,2], frequency=12,start =c(2010,01), end=c(2021,10))

#water
par(mfrow=c(1,2))
Acf(ts_water_monthly,lag.max=40,main=paste("ACF for Groundwater"),ylim=c(-0.5,1)) 
Pacf(ts_water_monthly,lag.max=40,main=paste("PACF for Groundwater"),ylim=c(-0.5,1))

plot_grid(
  autoplot(ts_water_monthly),
  autoplot(Acf(ts_water_monthly,plot=FALSE,lag.max=40)),
  autoplot(Pacf(ts_water_monthly,plot=FALSE,lag.max=40)),
  nrow=1
)

#decompose 
water_decompose<-decompose(ts_water_monthly)
plot(water_decompose)

#deseason
water_deseason<-seasadj(water_decompose)
plot_grid(
  autoplot(water_deseason),
  autoplot(Acf(water_deseason,plot=FALSE,lag.max=40)),
  autoplot(Pacf(water_deseason,plot=FALSE,lag.max=40)),
  nrow=1
)

par(mfrow=c(1,2))
Acf(water_deseason,lag.max=40,main=paste("ACF for Groundwater"),ylim=c(-0.5,1)) 
Pacf(water_deseason,lag.max=40,main=paste("PACF for Groundwater"),ylim=c(-0.5,1))

#ts; monthly 
#Mann-Kendall
summary(MannKendall(water_deseason)) #reject the null hypothesis, supporting the presence of a trend
## Score =  808 , Var(Score) = 1488413
## denominator =  27966
## tau = 0.0289, 2-sided pvalue =0.50831
#seasonal Mann-Kendall
#correction: calculate monthly average 
trend::smk.test(water_deseason) 
## 
##  Seasonal Mann-Kendall trend test (Hirsch-Slack test)
## 
## data:  water_deseason
## z = 0.53391, p-value = 0.5934
## alternative hypothesis: true S is not equal to 0
## sample estimates:
##     S  varS 
##    57 11001
#ADF test
print(adf.test(water_deseason,alternative = "stationary")) #reject the null hypothesis, the trend is stationary 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  water_deseason
## Dickey-Fuller = -3.0006, Lag order = 6, p-value = 0.1552
## alternative hypothesis: stationary
#not sure why we are running it on deseasoned data here since this test can handle seasonality

print(adf.test(ts_water_monthly,alternative = "stationary"))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_water_monthly
## Dickey-Fuller = -3.0442, Lag order = 6, p-value = 0.1368
## alternative hypothesis: stationary
#p-value>0.05 so null hypothesis cant be rejected and trend is stochastic

SMKtest<- SeasonalMannKendall(ts_water_monthly)
print(summary(SMKtest))
## Score =  57 , Var(Score) = 11001
## denominator =  2223
## tau = 0.0256, 2-sided pvalue =0.58682
## NULL
#ts; monthly
#find out how many times is needed for differencing 
#ns_diff<-nsdiffs(water_deseason)
#cat("Number of seasonal differencing needed:", ns_diff) #no need to difference the series
#question: does not need to difference the series but has trend in the data

#this is no need for seasonal differencing

n_diff<- ndiffs(ts_water_monthly)
cat("Number of trend differencing needed:", n_diff)
## Number of trend differencing needed: 1
#differencing is still needed for the stationary trend, d=1
#water
#from 2001-03-01 to 2021-10-22

#monthly ts
#ts_training<-ts(df_water_monthly[,2],
                #frequency=12,
                #start=c(2002,02), end=c(2020,09))

#I'm not sure why the start=c(2010,01) says 2010. Shouldn't it be 2001?


#ts_testing<-ts(df_water_monthly[,2],
               #frequency=12,
               #start=c(2020,10), end=c(2021,10))

nobs <- nrow(df_water_monthly)
nfor <- 12

ts_training2 <- subset(ts_water_monthly, end=length(ts_water_monthly)-nfor)
ts_testing2 <- subset(ts_water_monthly, start = length(ts_water_monthly)-nfor+1)

autoplot(ts_training2)

autoplot(ts_testing2)

#ts(ts_water_monthly[1:(nobs-nfor)],
#start=c(2002,02),
#frequency = 12)
  
#ts_water_monthly[(nobs-nfor+1):nobs]
#head(ts_testing2)
par(mfrow=c(1,1))

#SARIMA

sarima_fit <- auto.arima(ts_training2)
checkresiduals(sarima_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,2)
## Q* = 20.844, df = 21, p-value = 0.4685
## 
## Model df: 3.   Total lags used: 24
sarima_for <- forecast(sarima_fit,h=nfor)
plot(sarima_for)

sarima_score <- accuracy(sarima_for$mean, ts_testing2)
ETS_fit <- stlf(ts_training2,h=nfor)

ETS_score <- accuracy(ETS_fit$mean, ts_testing2)

autoplot(ts_water_monthly)+
  autolayer(ETS_fit, series="STL +ETS", PI=FALSE)+
  autolayer(sarima_for, series = "SARIMA", PI=FALSE)

SS_ll <- StructTS(ts_training2, type="level")
SS_llt <- StructTS(ts_training2, type="trend")
SS_bsm <- StructTS(ts_training2, type="BSM")

SS_ll_for <- forecast(SS_ll,h=nfor)
plot(SS_ll_for)

SS_ll_score <- accuracy(SS_ll_for$mean,ts_testing2)

SS_llt_for <- forecast(SS_llt,h=nfor)
plot(SS_llt_for)

SS_llt_score <- accuracy(SS_llt_for$mean,ts_testing2)

SS_bsm_for <- forecast(SS_bsm,h=nfor)
plot(SS_bsm_for)

SS_bsm_score <- accuracy(SS_bsm_for$mean,ts_testing2)

#ETS is better if looking at MAPE
#ARIMA +Fourier
ARIMA_Four_fit_1<-auto.arima(ts_training2,
                             seasonal=FALSE, 
                             xreg=fourier(ts_training2,K=c(2)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
                                xreg=fourier(ts_training2,K=c(2),h=nfor),
                                h=nfor) 

#K=c(4)
ARIMA_Four_fit_2<-auto.arima(ts_training2,
                             seasonal=FALSE, 
                             xreg=fourier(ts_training2,K=c(4)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_2<-forecast(ARIMA_Four_fit_2,
                                xreg=fourier(ts_training2,K=c(4),h=nfor),
                                h=nfor)

#K=c(6)
ARIMA_Four_fit_3<-auto.arima(ts_training2,
                             seasonal=FALSE, 
                             xreg=fourier(ts_training2,K=c(6)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_3<-forecast(ARIMA_Four_fit_3,
                                xreg=fourier(ts_training2,K=c(6),h=nfor),
                                h=nfor)

scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,ts_testing2)
scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,ts_testing2)
scores_ARIMA_Four_forecast_3<-accuracy(ARIMA_Four_forecast_3$mean,ts_testing2)
#temp
#create time series object 
#ts_temp_training<-ts(df_temp_preci[,2], frequency=12,
                     #start=c(2010,01), end=c(2020,09))

#ts_temp_testing<-ts(df_temp_preci[,2], frequency=12,
                     #start=c(2020,10), end=c(2021,10))

ts_temp_monthly<-ts(df_temp_preci2[,2], frequency=12,start =c(2002,02), end=c(2021,10))
ts_preci_monthly<-ts(df_temp_preci2[,3], frequency=12,start =c(2002,02), end=c(2021,10))

#preci
#create time series object 
#ts_preci_training<-ts(df_temp_preci[,3], frequency=12,
                      #start=c(2010,01), end=c(2020,09))

#ts_preci_testing<-ts(df_temp_preci[,3], frequency=12,
                     #start=c(2020,10), end=c(2021,10))

#ts_temp<-ts(df_temp_preci[,2], frequency=12,start=c(2010,01), end=c(2021,10))
#ts_preci<-ts(df_temp_preci[,3], frequency=12,start=c(2010,01), end=c(2021,10))

autoplot(ts_temp_monthly)

autoplot(ts_preci_monthly)

ts_temp_training2 <- subset(ts_temp_monthly, end=length(ts_temp_monthly)-nfor)
ts_temp_testing2 <- subset(ts_temp_monthly, start = length(ts_temp_monthly)-nfor+1)

ts_preci_training2 <- subset(ts_preci_monthly, end=length(ts_preci_monthly)-nfor)
ts_preci_testing2 <- subset(ts_preci_monthly, start = length(ts_preci_monthly)-nfor+1)
#correlation test 
cor_water_temp<-cor.test(ts_water_monthly,ts_temp_monthly,method = "spearman",exact = FALSE)
cor_water_preci<-cor.test(ts_water_monthly,ts_preci_monthly,method = "spearman",exact = FALSE)


#create a summary table 
summary_table_cor<-data.frame(
  Variable = c("Temperature", "Precipitation"),
  Correlation_Coefficient = round(c(cor_water_temp$estimate, 
                                    cor_water_preci$estimate), 5),
  P_Value = round(c(cor_water_temp$p.value, 
                    cor_water_preci$p.value), 5)
)

print(summary_table_cor)
##        Variable Correlation_Coefficient P_Value
## 1   Temperature                -0.00128 0.98438
## 2 Precipitation                 0.06314 0.33315
#decide not to use the exogenous variable 

#K=c(2)
#fit ARIMA with deseason time series 

xreg1 <- as.matrix(data.frame(fourier(ts_training2, K=c(2)),"preci"=ts_preci_training2))

ARIMA_Four_fit_XREG_1<-auto.arima(ts_training2,
                                  seasonal=FALSE,
                                  #combine fourier and exogenous variable time series  
                                  xreg=xreg1) 

#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_1<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg1_for<-as.matrix(data.frame(fourier(ts_training2, K=c(2), h=12), "preci"=preci_forecast_1$mean))

#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_1<-forecast(ARIMA_Four_fit_XREG_1,
                                     xreg=xreg1_for,
                                     h=12)


#K=c(4)

xreg2 <- as.matrix(data.frame(fourier(ts_training2, K=c(4)),"preci"=ts_preci_training2))

ARIMA_Four_fit_XREG_2<-auto.arima(ts_training2,
                                  seasonal=FALSE,
                                  #combine fourier and exogenous variable time series  
                                  xreg=xreg2) 

#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_2<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg2_for<-as.matrix(data.frame(fourier(ts_training2, K=c(4), h=12), "preci"=preci_forecast_2$mean))

#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_2<-forecast(ARIMA_Four_fit_XREG_2,
                                     xreg=xreg2_for,
                                     h=12)

#K=c(6)
xreg3 <- as.matrix(data.frame(fourier(ts_training2, K=c(6)),"preci"=ts_preci_training2))

ARIMA_Four_fit_XREG_3<-auto.arima(ts_training2,
                                  seasonal=FALSE,
                                  #combine fourier and exogenous variable time series  
                                  xreg=xreg3) 

#generate fourier terms
#fourier_terms_1<-fourier(ts_training2, K=c(2), h=12)
#generate forecast
preci_forecast_3<-forecast(ts_preci_training2, h=12)
#combine fourier terms and forecasts into a numeric matrix
xreg3_for<-as.matrix(data.frame(fourier(ts_training2, K=c(6), h=12), "preci"=preci_forecast_3$mean))

#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_3<-forecast(ARIMA_Four_fit_XREG_3,
                                     xreg=xreg3_for,
                                     h=12)

 
ARIMA_Four_XREG_1_score<-accuracy(ARIMA_Four_forecast_XREG_1$mean,ts_testing2)
ARIMA_Four_XREG_2_score<-accuracy(ARIMA_Four_forecast_XREG_2$mean,ts_testing2)
ARIMA_Four_XREG_3_score<-accuracy(ARIMA_Four_forecast_XREG_3$mean,ts_testing2)
#p=1, P=0 because seasonal=FALSE

#K=c(2)
NN_fit_1<-nnetar(ts_training2,
                 p=1,
                 P=0,
                 xreg=fourier(ts_training2,K=c(2)))

NN_forecast_1<-forecast(NN_fit_1,
                        xreg=fourier(ts_training2,K=c(2),h=12), 
                        h=12)

#K=c(4)
NN_fit_2<-nnetar(ts_training2,
                 p=1,
                 P=0,
                 xreg=fourier(ts_training2,K=c(4)))

NN_forecast_2<-forecast(NN_fit_2,
                        xreg=fourier(ts_training2,K=c(4),h=12), 
                        h=12)

#K=c(6)
NN_fit_3<-nnetar(ts_training2,
                 p=1,
                 P=0,
                 xreg=fourier(ts_training2,K=c(6)))

NN_forecast_3<-forecast(NN_fit_3,
                        xreg=fourier(ts_training2,K=c(6),h=12), 
                        h=12)


NN1_score <- accuracy(NN_forecast_1$mean,ts_testing2)
NN2_score <- accuracy(NN_forecast_2$mean,ts_testing2)
NN3_score <- accuracy(NN_forecast_3$mean,ts_testing2)
#Model1:ETS
#scores_ETS<-accuracy(ETS_fit$mean,ts_testing)

#Model2: ARIMA + Fourier, K=c(2) 
#scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,ts_testing)

#Model3: ARIMA + Fourier, K=c(4)
#scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,ts_testing)

#Model4: ARIMA + Fourier, K=c(6)
#scores_ARIMA_Four_forecast_3<-accuracy(ARIMA_Four_forecast_3$mean,ts_testing)

#Model5: Neural network, K=c(2) 
#scores_NN_1<-accuracy(NN_forecast_1$mean,ts_testing)

#Model6: Neural network, K=c(4)
#scores_NN_2<-accuracy(NN_forecast_2$mean,ts_testing)

#Model7: Neural network, K=c(6)
#scores_NN_3<-accuracy(NN_forecast_3$mean,ts_testing)

#Model8: ARIMA + Fourier + XREG, K=c(2) 
#scores_ARIMA_Four_XREG_1<-accuracy(ARIMA_Four_forecast_XREG_1$mean,ts_testing)

#Model8: ARIMA + Fourier + XREG, K=c(4) 
#scores_ARIMA_Four_XREG_2<-accuracy(ARIMA_Four_forecast_XREG_2$mean,ts_testing)

#Model9: ARIMA + Fourier + XREG, K=c(6)
#scores_ARIMA_Four_XREG_3<-accuracy(ARIMA_Four_forecast_XREG_3$mean,ts_testing)

#create a data frame to store the scores 
scores<-as.data.frame(rbind(sarima_score,
                            ETS_score,
                            SS_ll_score,
                            SS_llt_score,
                            SS_bsm_score,
                            scores_ARIMA_Four_forecast_1,
                            scores_ARIMA_Four_forecast_2,
                            scores_ARIMA_Four_forecast_3,
                            NN1_score,
                            NN2_score,
                            NN3_score,
                            ARIMA_Four_XREG_1_score,
                            ARIMA_Four_XREG_2_score,
                            ARIMA_Four_XREG_3_score))

#scores_NN_all<-as.data.frame(rbind(
                            #scores_NN_1,
                            #scores_NN_2,
                            #scores_NN_3))
row.names(scores)<-c("SARIMA",
                     "ETS",
                     "SS LL",
                     "SS LLT",
                     "SS BSM",
                     "ARIMA Fourier K=c(2)",
                     "ARIMA Fourier K=c(4)",
                     "ARIMA Fourier K=c(6)",
                     "NN, K=c(2)",
                     "NN, K=c(4)",
                     "NN, K=c(6)",
                     "ARIMA Fourier K=c(2), Preci",
                     "ARIMA Fourier K=c(4), Preci",
                     "ARIMA Fourier K=c(6), Preci")

#row.names(scores_NN_all)<-c(
                     #"NN, K=c(2)",
                     #"NN, K=c(4)",
                     #"NN, K=c(6)")

#create a comparable table 
kbl(scores, 
    caption = "Forecast Accuracy for Groundwater Level",
    digits = array(5,ncol(scores))) %>%
  kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")
Forecast Accuracy for Groundwater Level
ME RMSE MAE MPE MAPE ACF1 Theil’s U
SARIMA -37.75665 44.82988 40.73087 -17.71579 18.76675 -0.29561 1.42512
ETS -41.43256 48.61780 43.34761 -19.40871 20.08541 -0.27734 1.54607
SS LL -44.42955 51.01336 45.82536 -20.70269 21.19591 -0.27103 1.62979
SS LLT -49.08918 55.45828 49.17074 -22.78976 22.81858 -0.25569 1.77316
SS BSM -46.07920 52.56657 46.92513 -21.44167 21.74058 -0.27533 1.67848
ARIMA Fourier K=c(2) -37.24543 44.27194 40.05454 -17.48731 18.47993 -0.31483 1.40568
ARIMA Fourier K=c(4) -37.37725 44.44457 40.05086 -17.54926 18.49570 -0.30373 1.40961
ARIMA Fourier K=c(6) -38.27517 45.06588 40.60453 -17.93086 18.75396 -0.29797 1.43071
NN, K=c(2) -41.44304 47.88450 43.45199 -19.31467 20.02454 -0.28125 1.52564
NN, K=c(4) -37.07205 43.90341 40.93468 -17.25721 18.62450 -0.14014 1.41739
NN, K=c(6) -36.98488 43.71657 40.37459 -17.27148 18.47092 -0.25300 1.40162
ARIMA Fourier K=c(2), Preci -37.17543 44.22498 39.98923 -17.45795 18.45223 -0.31427 1.40402
ARIMA Fourier K=c(4), Preci -37.33546 44.41671 40.02878 -17.53163 18.48517 -0.30409 1.40867
ARIMA Fourier K=c(6), Preci -38.31966 45.10509 40.63914 -17.95084 18.77045 -0.29871 1.43185
#kbl(scores_NN_all, 
    #caption = "Forecast Accuracy for Electricity Demand",
    #digits = array(5,ncol(scores_NN_all))) %>%
  #kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")

#RMSE and MAPE
#NN, K=c(2) has the best performance
#forecast period 
forecast_start<-start(ARIMA_Four_forecast_1$mean)
forecast_end<-end(ARIMA_Four_forecast_1$mean)
ts_forecast<-window(ts_water_monthly, start=forecast_start, end=forecast_end)

#plot forecasting result
autoplot(ts_testing2)

autoplot(ts_testing2)+
  autolayer(NN_forecast_3, series="Neural Network c(6)", PI=FALSE)+
  autolayer(sarima_for, series = "SARIMA",PI=FALSE)+
  autolayer(ARIMA_Four_forecast_1, series = "ARIMA + Fourier c(2)",PI=FALSE)

#NN c(6) is the best model
NN_all<-nnetar(ts_water_monthly,
                 p=1,
                 P=0,
                 xreg=fourier(ts_water_monthly,K=c(6)))

NN_for_all<-forecast(NN_all,
                        xreg=fourier(ts_water_monthly,K=c(6),h=36), 
                        h=36)
resi_NN_all<-checkresiduals(NN_all)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,6)
## Q* = 29.995, df = 24, p-value = 0.1849
## 
## Model df: 0.   Total lags used: 24
autoplot(ts_water_monthly, series = "Observed Data")+
  autolayer(NN_for_all, series = "NN Forecast")+
  ylab("Groundwater Levels (feet)")

resi_ETS<-checkresiduals(ETS_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,N,N)
## Q* = 88.949, df = 24, p-value = 2.151e-09
## 
## Model df: 0.   Total lags used: 24
resi_sarima<-checkresiduals(sarima_for)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,2)
## Q* = 20.844, df = 21, p-value = 0.4685
## 
## Model df: 3.   Total lags used: 24
resi_SS_ll<-checkresiduals(SS_ll_for)

## 
##  Ljung-Box test
## 
## data:  Residuals from Local level structural model
## Q* = 57.431, df = 24, p-value = 0.0001459
## 
## Model df: 0.   Total lags used: 24
resi_ARIMA_Four_1<-checkresiduals(ARIMA_Four_forecast_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 20.827, df = 21, p-value = 0.4696
## 
## Model df: 3.   Total lags used: 24
resi_ARIMA_Four_2<-checkresiduals(ARIMA_Four_forecast_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 22.015, df = 21, p-value = 0.3987
## 
## Model df: 3.   Total lags used: 24
resi_ARIMA_Four_3<-checkresiduals(ARIMA_Four_forecast_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 21.316, df = 21, p-value = 0.4398
## 
## Model df: 3.   Total lags used: 24
resi_ARIMA_Four_XREG_1<-checkresiduals(ARIMA_Four_forecast_XREG_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 20.891, df = 21, p-value = 0.4656
## 
## Model df: 3.   Total lags used: 24
resi_ARIMA_Four_XREG_2<-checkresiduals(ARIMA_Four_forecast_XREG_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 22.112, df = 21, p-value = 0.3931
## 
## Model df: 3.   Total lags used: 24
resi_ARIMA_Four_XREG_3<-checkresiduals(ARIMA_Four_forecast_XREG_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,2) errors
## Q* = 21.301, df = 21, p-value = 0.4407
## 
## Model df: 3.   Total lags used: 24
resi_NN_forecast_1<-checkresiduals(NN_forecast_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,3)
## Q* = 51.949, df = 24, p-value = 0.0007945
## 
## Model df: 0.   Total lags used: 24
resi_NN_forecast_2<-checkresiduals(NN_forecast_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,5)
## Q* = 43.333, df = 24, p-value = 0.009111
## 
## Model df: 0.   Total lags used: 24
resi_NN_forecast_3<-checkresiduals(NN_forecast_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,6)
## Q* = 43.068, df = 24, p-value = 0.009772
## 
## Model df: 0.   Total lags used: 24